import pandas as pd

from application.core.base_extract_faust import BaseExtractFaust
from application.tasks.article_ch_task.article_ch_data_storage_task import ArticleChDataStorageTask
from application.tasks.article_ch_task.article_ch_field_translation_task import ArticleChFieldTranslationTask
from application.tasks.article_ch_task.article_ch_info_clean_task import ArticleChInfoCleanTask
from application.tasks.print_task import PrintTask


class ArticleChExtract(BaseExtractFaust):
    flow_name = "中文文献入库数据流"
    pipeline_list = [
        ArticleChInfoCleanTask(),  # 数据清洗
        ArticleChFieldTranslationTask(),  # 字段转换
        ArticleChDataStorageTask(),  # 入库
        # PrintTask()  # 打印
    ]

    async def extract(self, input_data):
        df = pd.DataFrame(input_data)
        return df
